COVID-19 Vaccine Distribution Reccommendation
northeast <- c("Connecticut", "Maine", "Massachusetts", "New Hampshire", "Rhode Island", "Vermont", "New Jersey", "New York", "Pennsylvania", "New York City")
midwest <- c("Illinois", "Indiana", "Michigan", "Ohio", "Wisconsin", "Iowa", "Kansas", "Minnesota", "Missouri", "Nebraska", "North Dakota", "South Dakota")
south <- c("Delaware", "Oklahoma", "Florida", "Tennessee", "Texas", "Kentucky", "Arkansas", "Louisiana", "Georgia", "Alabama", "Maryland", "North Carolina", "South Carolina", "Virginia", "District of Columbia", "Mississippi", "West Virginia")
west <- c("Arizona", "Colorado", "Idaho", "Montana", "Nevada", "New Mexico", "Utah", "Wyoming", "Alaska", "California", "Hawaii", "Oregon", "Washington")
SexAge$Region <- SexAge$State
for(i in seq(1, nrow(SexAge), 1)){
if (SexAge$State[i] %in% northeast)
{
SexAge$Region[i] <- "northeast"
}
else if (SexAge$State[i] %in% south)
{
SexAge$Region[i] <- "south"
}
else if (SexAge$State[i] %in% midwest)
{
SexAge$Region[i] <- "midwest"
}
else if (SexAge$State[i] %in% west)
{
SexAge$Region[i] <- "west"
}
}
RaceEthnicity$Region <- RaceEthnicity$State
for(i in seq(1, nrow(RaceEthnicity), 1)){
if (RaceEthnicity$State[i] %in% northeast)
{
RaceEthnicity$Region[i] <- "northeast"
}
else if (RaceEthnicity$State[i] %in% south)
{
RaceEthnicity$Region[i] <- "south"
}
else if (RaceEthnicity$State[i] %in% midwest)
{
RaceEthnicity$Region[i] <- "midwest"
}
else if (RaceEthnicity$State[i] %in% west)
{
RaceEthnicity$Region[i] <- "west"
}
}
RaceEthnicity<- pivot_longer(RaceEthnicity, c(`Non.Hispanic.White`,`Non.Hispanic.Black.or.African.American`, `Non.Hispanic.American.Indian.or.Alaska.Native`,`Non.Hispanic.Asian`,`Non.Hispanic.Native.Hawaiian.or.Other.Pacific.Islander`,`Hispanic.or.Latino`,`Other`),names_to="Ethnicity",values_to="Proportion")
Marianna <- RaceEthnicity %>%
filter(Indicator == "Distribution of COVID-19 deaths (%)" | Indicator == "Unweighted distribution of population (%)") %>%
pivot_wider(names_from = "Indicator", values_from = "Proportion") %>%
filter(Region != "United States")%>%
group_by(Region, Ethnicity) %>%
summarize(`Distribution of COVID deaths (%)` = mean(`Distribution of COVID-19 deaths (%)`, na.rm = TRUE), `Distribution of Pop (%)` = mean(`Unweighted distribution of population (%)`, na.rm = TRUE)) %>%
pivot_longer(c("Distribution of COVID deaths (%)", "Distribution of Pop (%)"), names_to = "indicator", values_to="Proportion")
## `summarise()` regrouping output by 'Region' (override with `.groups` argument)
p_1<-ggplot(Marianna, aes(x = Proportion, y = Ethnicity, fill = indicator)) + facet_wrap(~Region) + geom_bar(stat = "identity")
p_1 + theme(legend.position = "bottom", legend.title = element_blank())
#Create a new dataset that only includes COVID deaths variable
COVID_gender <- SexAge %>% select(Sex, State, `COVID-19 Deaths`, `Age group`)
COVID_gender
## # A tibble: 2,661 x 4
## Sex State `COVID-19 Deaths` `Age group`
## <chr> <chr> <dbl> <chr>
## 1 All Sexes United States 261530 All Ages
## 2 All Sexes United States 29 Under 1 year
## 3 All Sexes United States 133 0-17 years
## 4 All Sexes United States 17 1-4 years
## 5 All Sexes United States 46 5-14 years
## 6 All Sexes United States 449 15-24 years
## 7 All Sexes United States 1089 18-29 years
## 8 All Sexes United States 1909 25-34 years
## 9 All Sexes United States 11247 30-49 years
## 10 All Sexes United States 4917 35-44 years
## # … with 2,651 more rows
#BAsed on this table, as of December 9, 2020, there have been a total of 261,530 deaths caused by COVID-19.
COVID_sex <- COVID_gender %>% filter(`Age group`== "All Ages", State=="United States")
COVID_sex
## # A tibble: 4 x 4
## Sex State `COVID-19 Deaths` `Age group`
## <chr> <chr> <dbl> <chr>
## 1 All Sexes United States 261530 All Ages
## 2 Male United States 141640 All Ages
## 3 Female United States 119883 All Ages
## 4 Unknown United States 7 All Ages
#Create a bar plot to show the distribution of deaths by sex
graph <- plot_ly(COVID_sex, x=~Sex, y=~`COVID-19 Deaths`, type="bar", color=~Sex)
graph
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
COVID_gender <- SexAge %>% select(Sex, State, `COVID-19 Deaths`, `Age group`, Region)
COVID_sex <- COVID_gender %>%
filter(`Age group`== "All Ages") %>%
filter(Region != "United States" & Region != "Puerto Rico")%>%
group_by(Region, Sex) %>%
summarize(deaths = sum(`COVID-19 Deaths`, na.rm = TRUE))
## `summarise()` regrouping output by 'Region' (override with `.groups` argument)
p_2 <- ggplot(COVID_sex, aes(x = COVID_sex$Sex, y = COVID_sex$deaths, fill = COVID_sex$Sex)) + geom_bar(stat = "identity") + facet_wrap(~Region) + ylab("Deaths") + xlab("Sex") + ggtitle("Relationship between Sex and Covid-19 deaths by region") + labs(fill = "Sex")
ggplotly(p_2)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
COVIDage <- Provisional_COVID.19_Death_Counts_by_Sex__Age__and_State
AgeGroup0 <- COVIDage %>%
filter(State=="United States", Sex=="All Sexes",Age.group==c("0-17 years"))
AgeGroup1 <- COVIDage %>%
filter(State=="United States", Sex=="All Sexes",Age.group==c("18-29 years","50-64 years"))
## Warning in Age.group == c("18-29 years", "50-64 years"): longer object length is
## not a multiple of shorter object length
AgeGroup2 <- COVIDage %>%
filter(State=="United States", Sex=="All Sexes",Age.group==c("30-49 years","65-74 years"))
## Warning in Age.group == c("30-49 years", "65-74 years"): longer object length is
## not a multiple of shorter object length
AgeGroup3 <- COVIDage %>%
filter(State=="United States", Sex=="All Sexes",Age.group==c("85 years and over"))
DeathvsAge <- bind_rows(AgeGroup0, AgeGroup1, AgeGroup2, AgeGroup3) %>%
arrange(desc(Age.group)) %>%
select(-c(8:13))
DeathvsAge %>%
ggplot(aes(x=Age.group,y=COVID.19.Deaths)) +
geom_bar(stat="identity", col="red", fill="steelblue")
COVIDage <- SexAge %>%
group_by(`Age group`, Region)%>%
summarize(covidDeaths = sum(`COVID-19 Deaths`, na.rm = TRUE)) %>%
filter(Region != "Puerto Rico" & Region != "United States")%>%
filter(`Age group` != "Under 1 year" & `Age group` != "All Ages" & `Age group` != "1-4 years" & `Age group` != "5-14 years")
## `summarise()` regrouping output by 'Age group' (override with `.groups` argument)
ggplot(COVIDage, aes(x = `Age group`, y = covidDeaths)) + geom_bar(stat = "identity", col = "red", fill = "steelblue") + facet_wrap(~Region) + coord_flip() + ylab("Covid-19 Deaths")
# Code used to produce map of states most impacted by COVID-19
states<-map_data("state")
top_5<- subset(states, region %in% c("texas", "california", "florida", "new jersey", "new york"))
ggplot(data = top_5) +
geom_polygon(aes(x = long, y = lat, group = group), fill = "palegreen", color = "black") +
coord_fixed(1.3)
# Code used to create graph of COVID-19 deaths by state
covid_totals<- COVID_TOTALS_lowercase%>%
select(State, COVID.19.Deaths)
map_covid <- left_join(covid_totals, states, by= c("State"="region"))
ggplot(map_covid, aes(long, lat, group = group)) +
geom_polygon(aes(fill = COVID.19.Deaths, color="yellow"),
colour = alpha("red", 1/2)) +
scale_fill_gradient(low="blue", high="red",
breaks = c(2, 4, 10, 100, 1000, 10000),
trans = "log10")+
geom_polygon(data = states, colour = "black", fill = NA) +
theme_void() +
coord_fixed(1.2)
# Code used to produce graph of COVID-19 deaths by sex in the 5 states with the most COVID-19 deaths
sex_age_state<- Provisional_COVID.19_Death_Counts_by_Sex__Age__and_State
graph_data_ggplot <- sex_age_state%>%
filter(State==c("Texas", "California", "Florida", "New Jersey", "New York"), Sex== c("Male", "Female"))
## Warning in State == c("Texas", "California", "Florida", "New Jersey", "New
## York"): longer object length is not a multiple of shorter object length
## Warning in Sex == c("Male", "Female"): longer object length is not a multiple of
## shorter object length
graph_state_sex<- ggplot(graph_data_ggplot, aes(fill=Sex, y=COVID.19.Deaths, x=State))+
geom_bar(position = "stack", stat = "identity")
graph_state_sex
## Warning: Removed 2 rows containing missing values (position_stack).
# Code used to produce graph of COVID-19 deaths by age in the 5 states with the most COVID-19 deaths
sex_state <- sex_age_state%>%
filter(State==c("Texas", "California", "Florida", "New Jersey", "New York"), Age.group!= "All Ages")
## Warning in State == c("Texas", "California", "Florida", "New Jersey", "New
## York"): longer object length is not a multiple of shorter object length
age_graph <- ggplot(sex_state, aes(fill=Age.group, y=COVID.19.Deaths, x=State))+
geom_bar(position = "stack", stat = "identity")
age_graph
## Warning: Removed 5 rows containing missing values (position_stack).
# Code used to produce graph of COVID-19 deaths by ethnicity in the 5 states with the most COVID-19 deaths
ethnicity_covid<-daata[c(5:208),-c(13)]
newest_ethnicity<- ethnicity_covid%>%
filter(Indicator!= c("Count of COVID-19 deaths","Weighted distribution of population (%)"))
newest_ethnicitycovid<-pivot_longer(newest_ethnicity,c(`Non.Hispanic.White`,`Non.Hispanic.Black.or.African.American`, `Non.Hispanic.American.Indian.or.Alaska.Native`,`Non.Hispanic.Asian`,`Hispanic.or.Latino`,`Other`),names_to="Ethnicity",values_to="Proportion")
new_newest_ethnicitycovid<- newest_ethnicitycovid%>%
filter(State==c("Texas", "California", "Florida", "New Jersey", "New York"))
## Warning in State == c("Texas", "California", "Florida", "New Jersey", "New
## York"): longer object length is not a multiple of shorter object length
race_graph <- ggplot(new_newest_ethnicitycovid, aes(fill=Ethnicity, y=Proportion, x=State))+
geom_bar(position = "stack", stat = "identity")
race_graph